SDG14

Author

Ignacio Saldivia Gonzatti

Published

February 7, 2023

Import packages and set things up

Code
import numpy as np
import pandas as pd
import os
import csv
import re
import composite as ci
import inspect
from functools import reduce
import plotly.express as px
from sklearn.preprocessing import RobustScaler

# from xml.etree import ElementTree as ET
# import requests
# import json
# import webbrowser
Code
# for VSC users, Latex in Plotly is not working
# Workaround from https://github.com/microsoft/vscode-jupyter/issues/8131
import plotly
from IPython.display import display, HTML

plotly.offline.init_notebook_mode()
display(
    HTML(
        '<script type="text/javascript" async src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-MML-AM_SVG"></script>'
    )
)
Code
# show all output from cells
from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = "all"  # last_expr
pd.set_option("display.max_columns", None)
Code
# create output folder if not there
if not os.path.exists("../output"):
    os.makedirs("../output")

Country names dict

Code
# load countries dictionary
country_to_abbrev = (
    pd.read_csv("../data/countryAbbr.csv", header=None, index_col=0).squeeze().to_dict()
)
# invert the dictionary
abbrev_to_country = dict(map(reversed, country_to_abbrev.items()))
# add additional abbreviations
abbrev_to_country["EL"] = "Greece"
abbrev_to_country["GB"] = "United Kingdom"
Code
allEurope = pd.read_csv("../data/Countries-Europe.csv")
allEurope = allEurope.name.to_list()
Code
# fmt: off
countries = [
    'Belgium','Germany','Denmark','Estonia','Spain','Finland','France',
'Ireland','Lithuania','Latvia','Netherlands','Poland','Portugal','Sweden',
'United Kingdom'
]
countriesEU = [
    "Belgium", "Bulgaria", "Cyprus", "Greece", "Germany", "Croatia",
    "Italy", "Denmark", "Estonia", "Spain", "Finland", "France",
    "Ireland", "Lithuania", "Latvia", "Malta", "Netherlands", "Poland",
    "Portugal", "Romania", "Sweden", "United Kingdom",]
countryAbb = [
    x if x not in country_to_abbrev else country_to_abbrev[x] for x in countries
]
# fmt: on

Define general functions

Code
def missingCountries(df, countries=countries):
    ### check missing countries in dataset
    missing = []
    for i in countries:
        if i not in df.index.unique():
            missing.append(i)
    if len(missing) > 0:
        print("Missing countries:\n", "\n".join(missing))
    else:
        print("No missing countries")


def negativeValues(df):
    ### check negative values in dataset
    if df[df < 0].count().sum() > 0:
        # replace negative values with 0
        df[df < 0] = 0
        print("Negative values in dataset replaced with 0")

SDG Official Data

SDG14 metadata can be found here

Code
# SDG14 indicators from the UNstats
# https://unstats.un.org/sdgs/dataportal/database

sdg14 = pd.read_excel("../data/Goal14_april2023.xlsx", sheet_name=0)
# fmt: off
sdg14 = sdg14[
    [ 
        "Target", "Indicator", "SeriesCode", "GeoAreaName", 
        "TimePeriod", "Value", "Units", "SeriesDescription" 
    ]
]
# fmt: on

sdg14.Value.replace("N", np.nan, inplace=True)
sdg14.Value = sdg14.Value.astype(np.float64)

# filter countries
# sdg14 = sdg14[sdg14['GeoAreaName'].isin(countries)]
# show indicators
pd.DataFrame(
    sdg14.groupby(["Indicator", "SeriesCode", "SeriesDescription", "Units"]).size()
)
0
Indicator SeriesCode SeriesDescription Units
14.1.1 EN_MAR_BEALITSQ Beach litter per square kilometer (Number) NUMBER 611
EN_MAR_BEALIT_BP Beach litter originating from national land-based sources that ends in the beach (%) PERCENT 900
EN_MAR_BEALIT_BV Beach litter originating from national land-based sources that ends in the beach (Tonnes) TONNES 900
EN_MAR_BEALIT_EXP Exported beach litter originating from national land-based sources (Tonnes) TONNES 900
EN_MAR_BEALIT_OP Beach litter originating from national land-based sources that ends in the ocean (%) PERCENT 900
EN_MAR_BEALIT_OV Beach litter originating from national land-based sources that ends in the ocean (Tonnes) TONNES 900
EN_MAR_CHLANM Chlorophyll-a anomaly, remote sensing (%) PERCENT 2784
EN_MAR_CHLDEV Chlorophyll-a deviations, remote sensing (%) PERCENT 4131
EN_MAR_PLASDD Floating plastic debris density (count per km2) NUMBER 3
14.2.1 EN_SCP_ECSYBA Number of countries using ecosystem-based approaches to managing marine areas (1 = YES; 0 = NO) NUMBER 33
14.3.1 ER_OAW_MNACD Average marine acidity (pH) measured at agreed suite of representative sampling stations PH 903
14.4.1 ER_H2O_FWTL Proportion of fish stocks within biologically sustainable levels (not overexploited) (%) PERCENT 422
14.5.1 ER_MRN_MPA Average proportion of Marine Key Biodiversity Areas (KBAs) covered by protected areas (%) PERCENT 6049
14.6.1 ER_REG_UNFCIM Progress by countries in the degree of implementation of international instruments aiming to combat illegal, unreported and unregulated fishing (level of implementation: 1 lowest to 5 highest) NUMBER 882
14.7.1 EN_SCP_FSHGDP Sustainable fisheries as a proportion of GDP PERCENT 5880
14.a.1 ER_RDE_OSEX National ocean science expenditure as a share of total research and development funding (%) PERCENT 159
14.b.1 ER_REG_SSFRAR Degree of application of a legal/regulatory/policy/institutional framework which recognizes and protects access rights for small-scale fisheries (level of implementation: 1 lowest to 5 highest) NUMBER 882
14.c.1 ER_UNCLOS_IMPLE Score for the implementation of UNCLOS and its two implementing agreements (%) PERCENT 63
ER_UNCLOS_RATACC Score for the ratification of and accession to UNCLOS and its two implementing agreements (%) PERCENT 63

Check official data by indicator

Code
# check sdg indicator, change SeriesCode to the one you want to check
# Example with Average marine acidity (pH) ER_OAW_MNACD
sdg14.loc[
    sdg14["GeoAreaName"].str.contains(r"^(?=.*United)(?=.*Kingdom)"), "GeoAreaName"
] = "United Kingdom"
sdg14check = sdg14[sdg14["GeoAreaName"].isin(countries)]
sdg14check = sdg14check[sdg14check["SeriesCode"] == "ER_OAW_MNACD"].pivot_table(
    columns="TimePeriod", index="GeoAreaName", values="Value", aggfunc="mean"
)
mr = sdg14check.columns[-1]
sdg14check[[2012, 2016, mr]]
missingCountries(sdg14check)
TimePeriod 2012 2016 2021
GeoAreaName
Belgium 8.161000 8.193125 7.8710
Estonia NaN NaN 7.7255
Finland NaN NaN NaN
France 8.078667 8.078500 NaN
Netherlands NaN NaN NaN
Spain 8.004000 7.996000 NaN
Sweden 8.137500 8.171750 NaN
United Kingdom 8.071250 8.071333 NaN
Missing countries:
 Germany
Denmark
Ireland
Lithuania
Latvia
Poland
Portugal

14.1

(a) Index of coastal eutrophication

We use Gross Nitrogen Balance: Max-Min transformation where the max-value is the maximum value in the period 2012-2021 and the min-value is zero.

Gross nutrient balance (Nitrogen kg/ha), Eurostat

Source

Code
# Gross nutrient balance (BAL_UAA, Nitrogen kg/ha), Eurostat
# https://ec.europa.eu/eurostat/databrowser/view/AEI_PR_GNB__custom_153613/

nitro = pd.read_csv("../data/aei_pr_gnb_page_linear.csv")
nitro["geo"] = nitro["geo"].map(abbrev_to_country).fillna(nitro["geo"])
nitro = nitro[nitro["geo"].isin(countries)]
nitro = nitro.pivot_table(
    columns="TIME_PERIOD", index="geo", values="OBS_VALUE", aggfunc="mean"
)
# convert negative values to 0
negativeValues(nitro)
mr = nitro.columns[-1]  # most recent year
# We use the most recent year to impute missing values
nitro.ffill(axis=1, inplace=True)
# maxMin transformation as (max-x)/(max-min) * 100 --> converts to 0-100 scale with 0=worst, 100=best
nitro = (
    (nitro.loc[:, "2012":mr].max().max() - nitro.loc[:, "2012":mr])
    / (nitro.loc[:, "2012":mr].max().max() - nitro.loc[:, "2012":mr].min().min())
    * 100
).round(2)
# nitro.ffill(axis=1, inplace=True)
nitro[[2012, 2016, mr]]

missingCountries(nitro)
TIME_PERIOD 2012 2016 2019
geo
Belgium 28.81 34.86 34.86
Denmark 61.33 63.18 63.18
Estonia 91.50 91.50 91.50
Finland 80.88 80.94 82.95
France 93.95 80.88 86.06
Germany 65.85 69.14 77.67
Ireland 87.64 77.29 72.82
Latvia 93.68 92.92 100.00
Lithuania 90.85 93.14 84.53
Netherlands 14.16 0.82 16.45
Poland 80.56 82.73 80.94
Portugal 82.84 81.32 82.14
Spain 88.45 85.51 79.90
Sweden 89.76 86.76 92.54
United Kingdom 59.10 59.64 59.86
No missing countries
Marine waters affected by eutrophication

Source

Code
# Marine waters affected by eutrophication (km2 and % EEZ), Eurostat
# https://ec.europa.eu/eurostat/databrowser/view/sdg_14_60/default/table?lang=en

eutro = pd.read_csv("../data/sdg_14_60_linear.csv")
eutro["geo"] = eutro["geo"].map(abbrev_to_country).fillna(eutro["geo"])
eutro = eutro[eutro["geo"].isin(countries) & (eutro["unit"] == "PC")]  # KM2 for area
eutro.set_index(["geo", "TIME_PERIOD"], inplace=True)
eutro = eutro.pivot_table(
    columns="TIME_PERIOD", index="geo", values="OBS_VALUE", aggfunc="mean"
)

# maxMin transformation as (max-x)/(max-min) * 100 --> converts to 0-100 scale with 0=worst, 100=best
eutro = (
    (eutro.loc[:, "2012":mr].max().max() - eutro.loc[:, "2012":mr])
    / (eutro.loc[:, "2012":mr].max().max() - eutro.loc[:, "2012":mr].min().min())
    * 100
).round(2)

# UK missing. We will use the average of the other countries each year
eutro.loc["United Kingdom"] = eutro.mean()

mr = eutro.columns[-1]
eutro[[2012, 2016, mr]]
missingCountries(eutro)
TIME_PERIOD 2012 2016 2019
geo
Belgium 100.000000 100.000000 100.000000
Denmark 100.000000 99.260000 100.000000
Estonia 90.040000 93.360000 87.820000
Finland 90.040000 86.350000 78.970000
France 100.000000 98.890000 97.050000
Germany 100.000000 100.000000 100.000000
Ireland 100.000000 100.000000 100.000000
Latvia 96.680000 95.200000 95.200000
Lithuania 99.260000 92.620000 94.100000
Netherlands 100.000000 100.000000 100.000000
Poland 98.520000 96.310000 92.250000
Portugal 80.810000 89.670000 90.410000
Spain 93.730000 93.360000 78.600000
Sweden 97.420000 91.880000 82.660000
United Kingdom 96.178571 95.492857 92.647143
No missing countries

(b) Floating Plastic Debris Density

We use two indicators:

  1. Plastic Waste kg/ha: Max-Min Transformation where the max-value and min-values are the maximum and minimum in the period 2012-2021.

  2. Recovery Rate of Plastic Packaging: Without further transformation as score is provided dimensionless.

1. Plastic Waste kg/ha, Eurostat

Source

Code
# Plastic Waste kg/ha, Eurostat
# https://ec.europa.eu/eurostat/databrowser/view/ENV_WASGEN/

wasteG = pd.read_csv("../data/env_wasgen.csv")
wasteG["geo"] = wasteG["geo"].map(abbrev_to_country).fillna(wasteG["geo"])
wasteG = wasteG[wasteG["geo"].isin(countries)]

wasteG = wasteG[wasteG["unit"] == "KG_HAB"].pivot_table(
    columns="TIME_PERIOD", index="geo", values="OBS_VALUE", aggfunc="mean"
)
mr = wasteG.columns[-1]  # most recent year
negativeValues(wasteG)
# maxMin transformation as (max-x)/(max-min) * 100 --> converts to 0-100 scale with 0=worst, 100=best
wasteG = (
    (wasteG.loc[:, "2012":mr].max().max() - wasteG.loc[:, "2012":mr])
    / (wasteG.loc[:, "2012":mr].max().max() - wasteG.loc[:, "2012":mr].min().min())
    * 100
).round(2)
# fill UK 2020 with 2019 value
wasteG = wasteG.ffill(axis=1)
wasteG[[2012, 2016, mr]]
missingCountries(wasteG)
TIME_PERIOD 2012 2016 2020
geo
Belgium 57.52 53.98 33.63
Denmark 92.92 92.04 88.50
Estonia 94.69 80.53 81.42
Finland 94.69 95.58 88.50
France 87.61 84.07 77.88
Germany 82.30 80.53 76.99
Ireland 86.73 82.30 74.34
Latvia 100.00 81.42 57.52
Lithuania 94.69 82.30 74.34
Netherlands 79.65 82.30 76.99
Poland 87.61 79.65 57.52
Portugal 94.69 83.19 71.68
Spain 88.50 95.58 92.92
Sweden 93.81 81.42 80.53
United Kingdom 76.99 69.91 70.80
No missing countries
2. Recovery rates for packaging waste, Plastic Packaging

Source

Code
# Recovery rates for packaging waste, Plastic Packaging, Percentage, Eurostat
# https://ec.europa.eu/eurostat/databrowser/view/ten00062/

wasteR = pd.read_csv("../data/ten00062.csv")
wasteR["geo"] = wasteR["geo"].map(abbrev_to_country).fillna(wasteR["geo"])
wasteR = wasteR[wasteR["geo"].isin(countries)]
wasteR = wasteR.pivot_table(
    columns="TIME_PERIOD", index="geo", values="OBS_VALUE", aggfunc="mean"
)
negativeValues(wasteR)
mr = wasteR.columns[-1]  # most recent year
# fill empty values with last available year
wasteR = wasteR.ffill(axis=1)
wasteR[[2012, 2016, mr]]
missingCountries(wasteR)
TIME_PERIOD 2012 2016 2020
geo
Belgium 92.7 99.5 99.4
Denmark 99.4 98.1 73.1
Estonia 44.0 85.8 87.4
Finland 51.0 97.2 99.4
France 64.0 64.5 71.8
Germany 99.7 99.8 99.9
Ireland 74.3 79.7 100.0
Latvia 38.7 41.8 47.9
Lithuania 38.9 74.4 63.4
Netherlands 98.1 95.8 94.9
Poland 26.2 54.8 45.0
Portugal 39.4 49.9 56.9
Spain 53.2 61.8 55.5
Sweden 58.1 61.7 50.7
United Kingdom 38.1 58.5 56.2
No missing countries

14.2

Proportion of national exclusive economic zones managed using ecosystem-based approaches

We use two indicators:

  1. Progress of implementation of Maritime Spatial Planning. Categorical data

  2. OHI Habitat: measures the status of marine habitats that support large numbers of marine species.

1. Progress of implementation of Maritime Spatial Planning

Source

Code
msp = pd.read_csv("../data/mspData.csv")
dictScore = {
    "implemented": 4,
    "complete not implemented": 3,
    "under development": 2,
    "not started": 1,
}
Code
mspVal = (
    msp[["Country", "2012", "2016", "2022"]]
    .set_index("Country")
    .replace(dictScore)
    # .melt(ignore_index=False)
    # .rename(columns={"variable": "Year", "value": "Score"})
    # .set_index("Year", append=True)
    # .sort_index()
)
mspVal = mspVal * 1 / len(dictScore) * 100
mspVal
missingCountries(mspVal)
2012 2016 2022
Country
Belgium 50.0 100.0 100.0
Germany 100.0 100.0 100.0
Denmark 25.0 25.0 100.0
Estonia 25.0 50.0 100.0
Spain 25.0 25.0 75.0
Finland 25.0 25.0 100.0
France 25.0 25.0 100.0
Ireland 25.0 25.0 100.0
Lithuania 50.0 75.0 100.0
Latvia 50.0 50.0 100.0
Netherlands 100.0 100.0 100.0
Poland 25.0 50.0 100.0
Portugal 25.0 50.0 100.0
Sweden 25.0 50.0 100.0
United Kingdom 25.0 50.0 100.0
No missing countries
2. OHI Biodiversity

Source

Code
# OHI 'Artisanal opportunities' Index
# https://oceanhealthindex.org/global-scores/data-download/

ohiBio = pd.read_csv("../data/scoresOHI.csv")
ohiBio = ohiBio[ohiBio["region_name"].isin(countries)]
ohiBio = ohiBio[
    (ohiBio.long_goal == "Artisanal opportunities") & (ohiBio.dimension == "status")
]
ohiBio = ohiBio.pivot_table(
    columns="scenario", index="region_name", values="value", aggfunc="mean"
)

ohiBio[[2012, 2018, 2022]]
missingCountries(ohiBio)
scenario 2012 2018 2022
region_name
Belgium 79.51 77.65 78.66
Denmark 70.50 77.24 74.63
Estonia 89.39 91.18 99.24
Finland 84.67 82.73 77.45
France 77.09 76.44 78.61
Germany 73.21 76.62 73.23
Ireland 69.90 73.46 73.52
Latvia 87.40 89.29 99.01
Lithuania 88.71 90.62 97.63
Netherlands 55.01 57.44 60.35
Poland 87.82 76.21 77.61
Portugal 77.81 65.46 66.08
Spain 73.45 70.66 73.08
Sweden 94.77 95.73 97.37
United Kingdom 81.35 82.74 79.79
No missing countries

14.3

Average marine acidity (pH) measured at agreed suite of representative sampling stations

We use two indicators:

  1. Greenhouse gas emissions under the Effort Sharing Decision (ESD): Max-Min transformation where the max- and min-values are the maximum and minimum in the assessment period (2012-2021)

  2. Carbon Emissions per capita: Max-Min Transformation where the max-value and min-values are the maximum and minimum in the period 2012-2021.

Reconstructed estimates of sea surface pH assessed with GLODAPv2.2021

Source Copernicus

Code
phCopernicus = pd.read_csv("../data/phCopernicus.csv", index_col=0)
phCopernicus.columns = phCopernicus.columns.astype(int)
mr = phCopernicus.columns[-1]  # most recent year
# maxMin transformation as (x - min)/(max-min) * 100 --> converts to 0-100 scale with 0=worst, 100=best
phScore = (
    (phCopernicus.loc[:, "2012":mr] - phCopernicus.loc[:, "2012":mr].min().min())
    / (
        phCopernicus.loc[:, "2012":mr].max().max()
        - phCopernicus.loc[:, "2012":mr].min().min()
    )
    * 100
).round(2)
phScore
missingCountries(phScore)
2012 2016 2021
Belgium 85.73 83.78 82.31
Germany 77.66 77.21 71.94
Denmark 83.36 82.50 77.90
Estonia 50.37 68.56 50.21
Spain 92.14 86.99 83.67
Finland 57.06 66.56 54.93
France 95.11 89.92 86.05
Ireland 100.00 94.54 89.66
Lithuania 22.48 41.72 26.98
Latvia 27.62 53.85 32.10
Netherlands 93.66 88.13 84.75
Poland 0.00 25.85 3.57
Portugal 93.06 89.27 84.02
Sweden 42.63 57.54 44.91
United Kingdom 97.09 92.37 87.64
No missing countries

2. Carbon emissions per capita

Several sources (see code)

Code
# Population on 1 January, Eurostat
# https://ec.europa.eu/eurostat/databrowser/view/tps00001/

pop = pd.read_csv("../data/tps00001.csv")
pop["geo"] = pop["geo"].map(abbrev_to_country).fillna(pop["geo"])
pop = pop[pop["geo"].isin(countries)]
pop = pop.pivot_table(
    columns="TIME_PERIOD", index="geo", values="OBS_VALUE", aggfunc="mean"
)
Code
# Data to corrobarate the ENV_AIR_GGE data. Includes UK
# Fossil CO2 emissions by country (territorial), million tonnes of carbon per year (1MtC = 1 million tonne of carbon = 3.664 million tonnes of CO2)
# https://globalcarbonbudget.org/carbonbudget/

co2T = pd.read_excel(
    "../data/National_Fossil_Carbon_Emissions_2022v1.0.xlsx", sheet_name=1, skiprows=11
)
co2T = co2T.T
co2T.columns = co2T.iloc[0, :]
co2T.columns = co2T.columns.astype(int)
co2T = co2T.rename_axis(index="geo", columns=None)
co2T = co2T[co2T.index.isin(countries)]
mr = co2T.columns[-1]  # most recent year
co2T = co2T * 3.664  # convert from carbon to co2
co2pc = co2T / pop * 1000_000  # tonnes co2 per capita
# maxMin transformation as (max-x)/(max-min) * 100 --> converts to 0-100 scale with 0=worst, 100=best
co2pc = (
    (co2pc.loc[:, 2012:mr].max().max() - co2pc.loc[:, 2012:mr])
    / (co2pc.loc[:, 2012:mr].max().max() - co2pc.loc[:, 2012:mr].min().min())
    * 100
).round(2)
co2pc = co2pc.ffill(axis=1)  # fill empty values with last available year
co2pc[[2012, 2016, mr]]

missingCountries(co2pc)
2012 2016 2021
geo
Belgium 49.53 53.33 57.88
Denmark 67.81 73.53 85.95
Estonia 13.02 13.75 61.61
Finland 47.54 55.14 70.86
France 82.44 86.35 90.68
Germany 41.78 45.19 59.36
Ireland 57.48 55.63 64.73
Latvia 98.05 98.19 96.65
Lithuania 88.46 89.81 86.82
Netherlands 44.28 45.44 59.73
Poland 55.26 55.61 54.39
Portugal 88.79 87.58 95.57
Spain 78.08 81.07 87.13
Sweden 86.97 91.69 100.00
United Kingdom 63.17 76.84 87.67
No missing countries

14.4

Proportion of fish stocks within biologically sustainable levels

We use two indicators:

  1. FMSY/F: catch-weighted average

  2. B/BMSY: catch-weighted average

1. FMSY/F

Data compiled partially manually

Source of FMSY and F is /stockAssesmentYEAR

Source of country catches is /OfficialNominalCatches and stockAssessment[“year”] PDFs.

Code
fmsyF = pd.read_csv("../data/ices_data.csv", index_col=0, header=[0, 1])
fmsyF = fmsyF[["Fref/F"]] * 100
fmsyF = fmsyF.droplevel(0, axis=1)
# most recent assessment is for 2022
fmsyF.rename(columns={"most recent": 2022}, inplace=True)
fmsyF = fmsyF[fmsyF.index.isin(countries)]
fmsyF
# maxMin transformation as (x - min)/(max-min) * 100 --> converts to 0-100 scale with 0=worst, 100=best
fmsyF = (
    (fmsyF - fmsyF.min().min())
    / (
        fmsyF.max().max()
        - fmsyF.min().min()
    )
    * 100
).round(2)
fmsyF
missingCountries(fmsyF)
Year 2012 2016 2022
Belgium 84.278853 89.253671 97.182818
Estonia 95.038299 82.573446 77.562646
Finland 99.177365 91.229736 82.840916
France 91.013767 93.548510 93.938257
Germany 81.283936 87.261924 90.831432
Ireland 71.053790 62.685203 86.770330
Latvia 94.318542 89.406109 81.381134
Lithuania 91.787307 83.126001 82.901621
Netherlands 85.883804 88.278969 93.834910
Poland 89.263580 89.580011 74.853470
Portugal 78.732711 84.161276 94.842807
Spain 92.064992 88.531210 91.937691
Sweden 91.475417 78.225767 74.962550
United Kingdom 98.135098 95.872798 90.761277
Denmark 95.419499 85.849679 88.529445
Year 2012 2016 2022
Belgium 59.17 72.81 94.53
Estonia 88.66 54.50 40.77
Finland 100.00 78.22 55.23
France 77.63 84.58 85.64
Germany 50.97 67.35 77.13
Ireland 22.93 0.00 66.00
Latvia 86.69 73.22 51.23
Lithuania 79.75 56.01 55.40
Netherlands 63.57 70.13 85.36
Poland 72.83 73.70 33.34
Portugal 43.98 58.85 88.12
Spain 80.51 70.83 80.16
Sweden 78.89 42.59 33.64
United Kingdom 97.14 90.94 76.94
Denmark 89.70 63.48 70.82
No missing countries

2. B/BMSY

Data collected partially manually

Source of BMSY and B is /stockAssesmentYEAR

Source of country catches is /OfficialNominalCatches and stockAssessment[“year”] PDFs.

Code
bBmsy = pd.read_csv("../data/ices_data.csv", index_col=0, header=[0, 1])
bBmsy = bBmsy[["B/Bref"]] * 100
bBmsy = bBmsy.droplevel(0, axis=1)
bBmsy.rename(columns={"most recent": 2022}, inplace=True)
bBmsy = bBmsy[bBmsy.index.isin(countries)]
bBmsy
# maxMin transformation as (x - min)/(max-min) * 100 --> converts to 0-100 scale with 0=worst, 100=best
bBmsy = (
    (bBmsy - bBmsy.min().min())
    / (
        bBmsy.max().max()
        - bBmsy.min().min()
    )
    * 100
).round(2)
bBmsy
missingCountries(bBmsy)
Year 2012 2016 2022
Belgium 91.982255 94.662804 91.871386
Estonia 99.839423 99.933347 95.679809
Finland 99.843072 99.941251 95.909145
France 97.115681 96.936702 96.369335
Germany 90.088417 93.281519 93.439957
Ireland 92.130861 85.437192 89.825424
Latvia 97.564685 99.565412 97.360783
Lithuania 92.382483 99.191985 96.254817
Netherlands 92.736442 89.856357 96.706294
Poland 96.568884 98.135256 92.284320
Portugal 98.746282 97.786405 98.794184
Spain 99.443829 96.930800 96.504370
Sweden 96.430034 97.171833 91.413094
United Kingdom 96.715727 95.636901 97.260299
Denmark 96.031151 95.816545 95.730634
Year 2012 2016 2022
Belgium 45.13 63.61 44.36
Estonia 99.30 99.95 70.62
Finland 99.32 100.00 72.20
France 80.52 79.28 75.37
Germany 32.07 54.08 55.18
Ireland 46.15 0.00 30.26
Latvia 83.61 97.41 82.21
Lithuania 47.89 94.83 74.58
Netherlands 50.33 30.47 77.70
Poland 76.75 87.55 47.21
Portugal 91.76 85.14 92.09
Spain 96.57 79.24 76.30
Sweden 75.79 80.91 41.20
United Kingdom 77.76 70.32 81.52
Denmark 73.04 71.56 70.97
No missing countries

14.5

Coverage of protected areas in relation to marine area

We consider two indicators:

  1. Coverage of protected areas in relation to marine areas: Distance to Reference transformation where the max-value is set to 30% and the min-value is 0%
  2. OHI ‘Biodiversity’ Index : No further transformation

1. Marine protected areas (% of territorial waters)

Source

Code
# EEZ area
# Data: https://emodnet.ec.europa.eu/geonetwork/srv/eng/catalog.search#/metadata/d4a3fede-b0aa-485e-b4b2-77e8e3801fd0
# https://www.europarl.europa.eu/factsheets/en/sheet/100/outermost-regions-ors-
outermost = [
    # Portugal
    "Azores",
    "Madeira",
    # Spain
    "Canary Islands",
    # we could include outermost regions of France, but we care EEZs in
    # the Atlantic, Baltic and North Sea
]
eez = pd.read_csv(
    "../data/EMODnet_EEZ_v11_20210506.csv",
    delimiter=";",
    encoding="latin-1",
)

eez = eez[eez["Territory"].isin(outermost + countries)]
for country in ["France", "Portugal", "Spain"]:
    eez.loc[eez["Country"].str.contains(country), "Country"] = country
eez = eez.apply(pd.to_numeric, errors="ignore")
eez = eez[["Country", "Area_km2"]].groupby(["Country"]).sum(numeric_only=True)
eez = eez[eez.index.isin(countries)]
eez
Area_km2
Country
Belgium 3495
Denmark 105021
Estonia 36451
Finland 81553
France 345240
Germany 56763
Ireland 427039
Latvia 28353
Lithuania 6832
Netherlands 64328
Poland 29982
Portugal 1728718
Spain 1007673
Sweden 155347
United Kingdom 731309
Code
# https://www.eea.europa.eu/data-and-maps/dashboards/natura-2000-barometer (in Barometer table)
natura2k = pd.DataFrame()
for root, dirs, files in os.walk("../data/natura2k"):
    for file in files:
        if file.endswith(".csv"):
            tempDF = pd.read_csv(
                os.path.join(root, file), sep="\t", encoding="utf-16", header=1
            )
            tempDF["year"] = re.findall("\d+", file[:-4])[0]
        natura2k = pd.concat([natura2k, tempDF], axis=0).reset_index(drop=True)

natura2k = natura2k.loc[:, natura2k.columns.str.contains("marine|year|Unnamed")]
natura2k.columns = np.concatenate([natura2k.iloc[0, :-1], natura2k.columns[-1:]])
natura2k = natura2k.iloc[1:].reset_index(drop=True)
natura2k.columns = natura2k.columns.str.strip()
natura2k = natura2k.set_index("Country")
natura2k = natura2k[natura2k.index.isin(countries)]
natura2kEEZ = natura2k.merge(eez, left_index=True, right_index=True)
natura2kEEZ = natura2kEEZ.apply(pd.to_numeric, errors="ignore")
natura2kEEZ["naturaEEZ"] = natura2kEEZ["Natura 2000"] / natura2kEEZ["Area_km2"] * 100
# target of 30% MPAs of EEZ
natura2kEEZ["Score"] = 100 * (natura2kEEZ.naturaEEZ) / 30
natura2kEEZ.loc[natura2kEEZ["Score"] > 100, ["Score"]] = 100
natura2kEEZ = natura2kEEZ.pivot_table(index="Country", columns="year", values="Score")
# Use 2019 value for UK
natura2kEEZ.ffill(axis=1, inplace=True)
natura2kEEZ[[2012, 2016, 2021]]
missingCountries(natura2kEEZ)
year 2012 2016 2021
Country
Belgium 100.000000 100.000000 100.000000
Denmark 60.473620 60.473620 60.473620
Estonia 61.763280 61.763280 61.763280
Finland 29.183476 29.183476 33.278972
France 40.225157 40.247364 100.000000
Germany 100.000000 100.000000 100.000000
Ireland 5.360166 8.007856 8.005514
Latvia 20.315311 51.575965 51.587721
Lithuania 32.933255 76.258782 76.258782
Netherlands 61.139784 78.156738 85.271318
Poland 80.459387 80.448269 80.559447
Portugal 0.507891 6.148101 8.182171
Spain 3.515029 27.920433 28.009086
Sweden 20.023989 43.406052 43.485444
United Kingdom 33.704859 57.729359 60.225340
No missing countries

Average proportion of Marine Key Biodiversity Areas (KBAs) covered by protected areas (%)

Source: official SDG 14 data

Code
sdg14check = sdg14[sdg14["GeoAreaName"].isin(countries)]
kbaMPA = sdg14check[sdg14check["SeriesCode"] == "ER_MRN_MPA"].pivot_table(
    columns="TimePeriod", index="GeoAreaName", values="Value", aggfunc="mean"
)
kbaMPA[[2012, 2016, 2022]]
missingCountries(kbaMPA)
TimePeriod 2012 2016 2022
GeoAreaName
Belgium 96.49980 96.84682 96.85317
Denmark 86.66340 86.74673 86.74673
Estonia 97.60969 97.60982 97.62768
Finland 59.78557 60.80889 60.85425
France 62.51162 76.43594 81.88238
Germany 74.06325 74.51850 80.79027
Ireland 75.92082 75.97923 83.16451
Latvia 96.16348 96.16360 96.16360
Lithuania 65.53137 83.51997 83.51997
Netherlands 93.31337 96.64630 96.64630
Poland 87.25113 87.25113 87.31556
Portugal 68.28585 70.43105 70.76779
Spain 62.51987 85.84322 85.86279
Sweden 56.67376 59.73273 60.46026
United Kingdom 80.23372 80.43096 81.23336
No missing countries

14.6

Degree of implementation of international instruments aiming to combat illegal, unreported and unregulated fishing

We use two indicators:

  1. Fishing Subsidies relative to Landings: Max-Min Transformation where the max-value and min-value are the maximum and minimum in the assessment period (2012-2021)
  2. TAC/Catch:

1. Fishing Subsidies relative to Landings

Source

Code
# selection of subsidies by level of risk to fishery sustainability
# as per https://stats.oecd.org/Index.aspx?QueryId=121718
highRisk = [
    "I.E.1. Fuel tax concessions",
    "I.A.1. Transfers based on variable input use",
    "I.A.2.1.Support to vessel construction/purchase",
    "I.A.2.2.Support to modernisation",
    "I.A.2.3.Support to other fixed costs",
    "II.A. Access to other countries’ waters",
    "I.B.2. Special insurance system for fishers",
]

moderateRisk = [
    "I.B.1. Income support",
    "I.C. Transfers based on the reduction of productive capacity",
    "II.B.1. Capital expenditures",
    "II.B.2. Subsidized access to infrastructure",
]

uncertainRisk = [
    "I.D. Miscellaneous direct support to individuals and companies",
    "I.E.2. Other tax exemptions",
    "II.D. Support to fishing communities",
    "II.C. Marketing and promotion",
    "II.E. Education and training",
    "II.F. Research and development",
    "II.H. Miscellaneous support for services to the sector",
]


noRisk = [
    "II.G.1. Management expenditures",
    "II.G.2. Stock enhancement programs",
    "II.G.3. Enforcement expenditures",
]
Code
# Fisheries Support Estimate
# https://stats.oecd.org/Index.aspx?DataSetCode=FISH_FSE
# selection as per https://stats.oecd.org/Index.aspx?QueryId=121718

fse = pd.read_csv("../data/FISH_FSE.csv")
fse = fse[fse["Country"].isin(countries)]

# strip variable codes and delete parent variables to avoid double counting
# solution from https://stackoverflow.com/q/76183612/14534411

separator = "."
fse["vCode"] = fse.Variable.str.rsplit(".", n=1).str.get(0)

variableAll = fse.vCode.unique().tolist()


def is_parent(p, target):
    return p.startswith(target) and len(p) > len(target) and p[len(target)] == "."


vSupport = []
prev = ""
for s in sorted(variableAll) + [""]:
    if prev and not is_parent(s, prev):
        vSupport.append(prev)
    prev = s

fse = fse[(fse.vCode.isin(vSupport)) | (fse.vCode.isna())]

# we use US dollars because some countries use their own currency ('Danish Krone', 'Zloty', 'Swedish Krona')
fse = fse[fse.Measure == "US dollar"][["Country", "Variable", "Value", "Year", "Unit"]]

fseRisk = fse[fse.Variable.isin(highRisk)]
fseRisk = fseRisk.groupby(["Country", "Year"]).sum(numeric_only=True)
negativeValues(fseRisk)
fse = fse.groupby(["Country", "Year"]).sum(numeric_only=True)
# fishery sustainability high risk subsidies over total subsidies
fseRisk = (fseRisk / fse * 100).pivot_table(
    index="Country", columns="Year", values="Value"
)
# fill missing values with nearest, then backfill and forward fill
fseRisk = (
    fseRisk.interpolate(method="nearest", axis=1, limit_direction="both")
    .bfill(axis=1)
    .ffill(axis=1)
)

# Finland is missing, we use the mean per year
fseRisk.loc["Finland"] = fseRisk.mean()

fseRisk[[2012, 2016, 2020]]
missingCountries(fseRisk)
Year 2012 2016 2020
Country
Belgium 24.616348 12.340143 12.198136
Denmark 40.113722 39.185678 35.047806
Estonia 3.793277 0.000000 7.876993
France 27.141469 0.417263 21.373683
Germany 2.162528 6.760218 2.229222
Ireland 65.070060 3.058643 1.279187
Latvia 7.241777 0.000000 0.000000
Lithuania 18.715209 2.379318 15.524788
Netherlands 2.900523 7.800939 5.064164
Poland 63.477199 100.000000 72.311430
Portugal 24.795666 86.662222 40.377379
Spain 2.744398 9.338087 18.755645
Sweden 39.131343 44.354321 28.298623
United Kingdom 7.358859 6.569589 14.080732
Finland 23.518741 22.776173 19.601271
No missing countries

3. IUU Fishing Index

Source

Code
selectedIUU = [
    "Accepted FAO Compliance Agreement",
    "Mandatory vessel tracking for commercial seagoing fleet",
    "Operate a national VMS/FMC centre",
    "Designated ports specified for entry by foreign vessels",
    "Ratification/accession of UNCLOS Convention",
    "Ratification/accession of UNFSA",
    "Have a NPOA-IUU ",
    "Compliance with RFMO flag state obligations",
    "Compliance with RFMO port state obligations",
]
Code
iuuIndex = pd.read_csv(
    "../data/iuu_fishing_index_2021-data_and_disclaimer/iuu_fishing_index_2019-2021_indicator_scores.csv"
)
iuuIndex = iuuIndex[
    (iuuIndex["Country"].isin(countries))
    & (iuuIndex["Indicator name"].isin(selectedIUU))
]

iuuIndex["Score"] = iuuIndex.groupby(
    ["Year", "Indicator name"], sort=False, group_keys=False
)["Score"].apply(lambda x: x.fillna(x.mean()))
iuuIndex = iuuIndex.pivot_table(
    index=["Country", "Year"], columns="Indicator name", values="Score"
)
alpha = 1 / len(iuuIndex.columns)
sigma = 10
compositeIUU = ci.compositeDF(alpha, iuuIndex, sigma)
compositeIUU = pd.DataFrame(compositeIUU, columns=["IUU"])
# max-min transform
compositeIUU["IUU"] = (
    (compositeIUU["IUU"].max() - compositeIUU["IUU"])
    / (compositeIUU["IUU"].max() - compositeIUU["IUU"].min())
    * 100
).round(2)

compositeIUU = compositeIUU.reset_index().pivot_table(
    index="Country", columns="Year", values="IUU"
)
compositeIUU
missingCountries(compositeIUU)
Year 2019 2021
Country
Belgium 100.00 59.46
Denmark 31.34 62.44
Estonia 70.46 70.46
Finland 36.72 59.46
France 40.02 54.78
Germany 31.34 70.46
Ireland 49.58 70.46
Latvia 92.24 70.46
Lithuania 31.34 38.89
Netherlands 16.32 31.34
Poland 72.91 70.46
Portugal 19.77 69.36
Spain 40.02 23.95
Sweden 70.46 70.46
United Kingdom 19.77 0.00
No missing countries

2. TAC/Catch

Data extracted partially manually.

Source of TAC is stockAssessment[“year”] PDFs.

Source of country catches is /OfficialNominalCatches and stockAssessment[“year”] PDFs.

Code
tacCatch = pd.read_csv("../data/ices_data.csv", index_col=0, header=[0, 1])
tacCatch = tacCatch[["TAC/Catch"]] * 100
tacCatch = tacCatch.droplevel(0, axis=1)
# most recent assessment is 2022
tacCatch.rename(columns={"most recent": 2022}, inplace=True)
tacCatch = tacCatch[tacCatch.index.isin(countries)]
tacCatch
# maxMin transformation as (x - min)/(max-min) * 100 --> converts to 0-100 scale with 0=worst, 100=best
tacCatch = (
    (tacCatch - tacCatch.min().min())
    / (
        tacCatch.max().max()
        - tacCatch.min().min()
    )
    * 100
).round(2)
tacCatch
missingCountries(tacCatch)
Year 2012 2016 2022
Belgium 78.480130 95.507005 97.255892
Estonia 98.404500 99.435507 96.478689
Finland 98.942568 99.843818 92.122689
France 84.168757 91.279043 97.873527
Germany 97.890177 96.075084 96.254821
Ireland 95.739977 94.352001 97.971417
Latvia 99.389419 99.008097 96.696104
Lithuania 98.155150 98.005229 95.209801
Netherlands 92.986309 95.043900 97.158599
Poland 99.762408 98.797493 94.911954
Portugal 96.065583 97.542063 98.963306
Spain 81.619492 94.504539 98.517713
Sweden 96.705334 98.484897 95.600203
United Kingdom 95.734499 93.625841 98.166726
Denmark 94.385126 96.196738 96.258764
Year 2012 2016 2022
Belgium 0.00 79.70 87.89
Estonia 93.26 98.09 84.25
Finland 95.78 100.00 63.86
France 26.63 59.91 90.78
Germany 90.86 82.36 83.20
Ireland 80.79 74.29 91.24
Latvia 97.87 96.09 85.27
Lithuania 92.10 91.39 78.31
Netherlands 67.90 77.53 87.43
Poland 99.62 95.10 76.91
Portugal 82.31 89.23 95.88
Spain 14.69 75.01 93.79
Sweden 85.31 93.64 80.14
United Kingdom 80.76 70.89 92.15
Denmark 74.45 82.93 83.22
No missing countries

14.7

Sustainable fisheries as a proportion of GDP in small island developing States, least developed countries and all countries

We use two indicators:

  1. ‘Livelihoods & economies’ Index as per Baltic Health Index (BHI)
  2. ‘Tourism’ Index as per BHI

1. ‘Livelihoods & economies’ Index

Divided into:

  1. Economies: Gross Value Added (GVA) annual growth rate of marine sectors against 1.5% target
  2. Livelihoods: Two subindicators: GVA/Hours Worked & FTE Employees/Coastal population

Source Blue Economy

Economies
Code
# Economies indicator, Value added at factor cost - million euro
# https://blue-economy-observatory.ec.europa.eu/blue-economy-indicators_en
gva = pd.read_excel("../data/blueEconomyObservatory/valueAddedFactorCost.xlsx")
gva.rename(
    columns={
        "Value added at factor cost (€ million)": "gva",
        "Member State": "country",
    },
    inplace=True,
)
gva = gva[gva["country"].isin(countries)]
gva = gva[gva.Sector != "-"]
gvaSector = gva.groupby(["country", "Sector", "Year"]).sum(numeric_only=True)
gvaSector["growthRate"] = gvaSector.groupby(["country", "Sector"])["gva"].pct_change()
# target of 1.5% annual growth rate
gvaSector["sectorScore"] = (gvaSector["growthRate"] / 0.015 * 100).clip(
    upper=100, lower=0
)
gvaSector["weight"] = gvaSector["gva"] / gvaSector.groupby(["country", "Year"])[
    "gva"
].transform("sum")

gvaSector["weightedScore"] = gvaSector["sectorScore"] * gvaSector["weight"]
gvaEcon = gvaSector.groupby(["country", "Year"]).sum(numeric_only=True)[
    ["gva", "weightedScore"]
]
economies = gvaEcon.reset_index().pivot_table(
    index="country", columns="Year", values="weightedScore"
)
economies[[2012, 2016, 2020]]
missingCountries(economies)
Year 2012 2016 2020
country
Belgium 96.873934 21.658872 21.297335
Denmark 4.504192 22.266304 61.905965
Estonia 52.767271 48.123759 54.687999
Finland 35.661048 73.708100 30.287035
France 32.382788 28.159942 0.247155
Germany 38.803670 64.176248 5.469456
Ireland 87.952005 100.000000 28.923979
Latvia 97.411580 2.196562 19.610579
Lithuania 37.153807 61.513965 42.140579
Netherlands 84.178366 21.389797 40.346288
Poland 62.903198 59.160696 80.944135
Portugal 61.985137 96.467062 9.280987
Spain 3.312837 85.334154 0.000000
Sweden 43.109175 71.468051 14.636507
United Kingdom 100.000000 20.980994 0.000000
No missing countries
Livelihoods
Code
# Livelihoods - Quality
# Hours Worked https://blue-economy-observatory.ec.europa.eu/blue-economy-indicators_en
# Select Insight Advisor, search in Asset for the measure and select Dimensions: year, member state

# hoursWorked = pd.read_excel('../data/blueEconomyObservatory/hoursWorked.xlsx')
# hoursWorked.rename(columns={'Member State':'country_name','Year':'year','Hours worked by employees':'hoursWorked'}, inplace=True)
# gvaHoursW = hoursWorked.merge(gvaEcon.reset_index(), on=['country_name','year'], how='left')
# gvaHoursW['gvaHoursW'] = gvaHoursW['value'] / gvaHoursW['hoursWorked'] * 1_000_000
# gvaHoursW = gvaHoursW[['country_name','year','gvaHoursW']].set_index(['country_name','year'])
# gvaHoursW.pivot_table(index='country_name', columns='year', values='gvaHoursW')

# Above, I calculated the GVA per hour worked ratio using the raw data of each variable.
# The result is different from the ratio variable provided by the Blue Economy Observatory. I emailed them to ask for clarification.

gvaHoursW = pd.read_excel(
    "../data/blueEconomyObservatory/GVAperHourWorked.xlsx"
).set_index(["Year", "Member State"])
gvaHoursW.rename(
    columns={"Gross value added per hour worked by employees": "gvaHoursW"},
    inplace=True,
)
gvaHoursW["gvaHoursW"] = gvaHoursW["gvaHoursW"].apply(pd.to_numeric, errors="coerce")
# Comparative price levels https://ec.europa.eu/eurostat/databrowser/view/TEC00120/default/table?lang=en
pppEurostat = pd.read_csv("../data/pppEurostat.csv")
pppEurostat.geo = pppEurostat.geo.replace(abbrev_to_country)
pppEurostat = (
    pppEurostat[pppEurostat.geo.isin(countries)]
    .rename(columns={"geo": "Member State", "TIME_PERIOD": "Year", "OBS_VALUE": "ppp"})[
        ["ppp", "Member State", "Year"]
    ]
    .set_index(["Member State", "Year"])
)
gvaHoursW = gvaHoursW.merge(pppEurostat, left_index=True, right_index=True)
gvaHoursW["gvaHoursWppp"] = gvaHoursW["gvaHoursW"] / gvaHoursW["ppp"] * 100
gvaHoursW = gvaHoursW[gvaHoursW.index.get_level_values("Year").isin(range(2012, 2023))]
gvaHoursW["gvaHoursWscore"] = (
    (gvaHoursW.gvaHoursW - gvaHoursW.gvaHoursW.min())
    / (gvaHoursW.gvaHoursW.max() - gvaHoursW.gvaHoursW.min())
    * 100
).round(2)

# Denmakr and France are missing.
# We fill them with the mean of the other countries by year
print(
    "Missing countries: \n",
    gvaHoursW[gvaHoursW.gvaHoursW.isna()]
    .index.get_level_values("Member State")
    .unique()
    .to_list(),
)
gvaHoursW["gvaHoursWscore"] = gvaHoursW.groupby("Year")["gvaHoursWscore"].transform(
    lambda x: x.fillna(x.mean())
)
Missing countries: 
 ['Denmark', 'France']
Code
# Livelihoods - Quantity
# FTE employees https://blue-economy-observatory.ec.europa.eu/blue-economy-indicators_en

fteEmployment = pd.read_excel("../data/blueEconomyObservatory/employmentFTE.xlsx")
fteEmployment.rename(
    columns={
        "Member State": "country_name",
        "Year": "year",
        "Employees in full time equivalent units": "fteEmployees",
    },
    inplace=True,
)
# Coastal areas https://ec.europa.eu/eurostat/web/coastal-island-outermost-regions/methodology
coastArea = pd.read_excel("../data/NUTS2021Coastal.xlsx", sheet_name="Coastal regions")[
    ["NUTS_ID", "COASTAL CATEGORY"]
]
coastArea.rename(
    columns={"NUTS_ID": "geo", "COASTAL CATEGORY": "coastal"}, inplace=True
)
# Population https://ec.europa.eu/eurostat/databrowser/view/DEMO_R_PJANAGGR3__custom_7060174/default/table?lang=en
population = pd.read_csv("../data/demoNUTS3.csv")

coastalPop = coastArea.merge(population, on="geo", how="left")
# France and UK have three letter abbreviation codes
coastalPop["country"] = coastalPop.geo.str.extract(r"([a-zA-Z]{2})").replace(
    abbrev_to_country
)
coastalPop = coastalPop[coastalPop.country.isin(countries)]
coastalPop = coastalPop.dropna(subset=["TIME_PERIOD"])
coastalPop["TIME_PERIOD"] = coastalPop["TIME_PERIOD"].astype(int)
coastalPop = (
    coastalPop.groupby(
        [
            "country",
            "TIME_PERIOD",
            "coastal",
        ]
    )
    .sum(numeric_only=True)
    .reset_index()
)
# 0 non-coastal, 1  coastal (on coast), 2 coastal (>= 50% of population living within 50km of the coastline)
coastalPop = (
    coastalPop[coastalPop["coastal"].isin([1, 2])]
    .groupby(["country", "TIME_PERIOD"])
    .sum()
)
fteCoastalPop = coastalPop.merge(
    fteEmployment,
    left_on=["country", "TIME_PERIOD"],
    right_on=["country_name", "year"],
    how="inner",
)
fteCoastalPop["fteCoastalPop"] = (
    fteCoastalPop["fteEmployees"] / fteCoastalPop["OBS_VALUE"]
)
fteCoastalPop["fteCoastalPopScore"] = (
    (fteCoastalPop.fteCoastalPop - fteCoastalPop.fteCoastalPop.min())
    / (fteCoastalPop.fteCoastalPop.max() - fteCoastalPop.fteCoastalPop.min())
    * 100
).round(2)
Code
# Livelihoods - Quantitative + Qualitative

livelihoods = (
    (
        gvaHoursW[["gvaHoursWscore"]]
        .reset_index()
        .merge(
            fteCoastalPop[["fteCoastalPopScore", "country_name", "year"]],
            left_on=["Member State", "Year"],
            right_on=["country_name", "year"],
            how="outer",
        )
        .groupby(["Member State"], group_keys=False)
    )
    # we use 2019 value for UK
    .apply(lambda x: x.ffill()).set_index(["Member State", "Year"])[
        ["gvaHoursWscore", "fteCoastalPopScore"]
    ]
)

sigma = 10
alpha = 1 / len(livelihoods.columns)
livelihoodsAgg = pd.DataFrame(
    ci.compositeDF(alpha=alpha, sigma=sigma, df=livelihoods),
    columns=["livelihoodsScore"],
)
livelihoodsAgg = livelihoodsAgg.pivot_table(
    index="Member State", columns="Year", values="livelihoodsScore"
)
livelihoodsAgg[[2012, 2016, 2020]]
missingCountries(livelihoodsAgg)
Year 2012 2016 2020
Member State
Belgium 30.281663 28.891296 32.771698
Denmark 19.324921 17.992665 18.660038
Estonia 38.648069 33.229465 24.106390
Finland 17.603990 16.970304 14.834441
France 19.335492 16.448000 16.373689
Germany 40.660083 47.184236 50.615666
Ireland 8.388481 11.608541 12.027315
Latvia 9.416146 11.863535 11.152544
Lithuania 36.846089 41.621058 49.893982
Netherlands 57.244435 31.071857 24.431804
Poland 19.193763 18.547868 20.915277
Portugal 8.728264 10.711938 10.882364
Spain 17.861111 17.688389 15.197841
Sweden 16.095669 18.499152 18.619556
United Kingdom 40.604179 25.811786 31.662945
No missing countries

2. Tourism: GVA/nights spents

Code
# Tourism: GVA/nights spents
# Nights spent at tourist accommodation establishments in coastal areas
# https://ec.europa.eu/eurostat/databrowser/view/TOUR_OCC_NIN2DC__custom_7065857/default/table?lang=en

nightCoast = pd.read_csv("../data/nightsAccomoCoastal.csv")
nightCoast.geo = nightCoast.geo.replace(abbrev_to_country)
nightCoast = nightCoast[nightCoast.geo.isin(countries)]
gvaAccomodation = gva[gva["Sub-sector"] == "Accommodation"]
gvaNights = gvaAccomodation.reset_index().merge(
    nightCoast,
    left_on=["country", "Year"],
    right_on=["geo", "TIME_PERIOD"],
    how="inner",
)
gvaNights["gvaNights"] = gvaNights["gva"] * 1_000_000 / gvaNights["OBS_VALUE"]
gvaNights = gvaNights.set_index(["country", "Year"])[["gvaNights"]]
gvaNights["gvaNightsScore"] = (
    (gvaNights.gvaNights - gvaNights.gvaNights.min())
    / (gvaNights.gvaNights.max() - gvaNights.gvaNights.min())
    * 100
).round(2)
gvaNights = gvaNights.pivot_table(
    index="country", columns="Year", values="gvaNightsScore"
)
gvaNights[[2012, 2016, 2020]]
missingCountries(gvaNights)
Year 2012 2016 2020
country
Belgium 13.73 8.06 10.77
Denmark 42.24 51.11 57.19
Estonia 24.09 23.99 37.59
Finland 29.40 40.76 21.99
France 15.10 15.04 11.40
Germany 33.49 34.18 44.95
Ireland NaN 44.73 59.24
Latvia 8.64 20.29 15.65
Lithuania 4.75 19.10 15.98
Netherlands 9.31 10.06 3.07
Poland 25.21 10.41 0.00
Portugal 20.24 23.19 31.04
Spain 20.39 22.30 13.35
Sweden 26.73 37.43 36.42
United Kingdom 24.64 9.21 NaN
No missing countries

14.a

Proportion of total research budget allocated to research in the field of marine technology

We use two indicators:

  1. Official UNSD indicator ER_RDE_OSEX: Max-Min transformation where the max-value and min-value are the maximum and minimum in the assessment period (2009-2017) of all European countries (not restricted to the countries in the analysis).
  2. SAD/TAC: Catch-weighted TAC relative to Scientific Advice

Note: ER_RDE_OSEX data comes from Global Ocean Science Report (GOSR) 2020, and goes from 2013 to 2017. Data for 2009-2012 data is available in the UNstats archive (download csv for 29-Mar-19)

1. Ocean science expenditure ER_RDE_OSEX

Several sources

Code
# %%time
# National ocean science expenditure as a share of total research and development funding (%), UNstats archive (years 2009-2012)
# https://unstats.un.org/sdgs/indicators/database/archive

# # read old data by chunks to speed loading and save 14.a.1 to separate file
# iter_csv = pd.read_csv('./data/AllData_Onward_20190329.csv', iterator=True, chunksize=1000, encoding='cp1252')
# oResearchOld = pd.concat([chunk[chunk.Indicator == '14.a.1'] for chunk in iter_csv])
# oResearchOld.to_csv('./data/archive14a1.csv', index=False)

oResearchOld = pd.read_csv("../data/archive14a1.csv")
oResearchOld = oResearchOld.pivot(
    index="GeoAreaName", columns="TimePeriod", values="Value"
)
Code
# National ocean science expenditure as a share of total research and development funding (%), UNstats
# https://unstats.un.org/sdgs/dataportal/database

# read official data and merge with archive data
oResearch = sdg14[sdg14["SeriesCode"] == "ER_RDE_OSEX"].pivot_table(
    columns="TimePeriod", index="GeoAreaName", values="Value", aggfunc="mean"
)
oResearch = oResearchOld.merge(
    oResearch, left_index=True, right_index=True, how="outer"
)
# use all countries in Europe
oResearch = oResearch[oResearch.index.isin(allEurope)].dropna(how="all", axis=1)
# fill nan of year 2013 from new report with old report and 2016 with 2017
oResearch[2013] = oResearch["2013_y"].fillna(oResearch["2013_x"])
# oResearch[2016] = oResearch[2016].fillna(oResearch[2017])
oResearch = oResearch[list(range(2013, 2018))]
# weighted by EEZ area
oResearch = oResearch.merge(eez, left_index=True, right_index=True, how="outer")
for col in oResearch.drop("Area_km2", axis=1).columns:
    oResearch[col] = (
        oResearch[col] * oResearch["Area_km2"] / (oResearch["Area_km2"].sum())
    )
# maxMin transformation as (x - min)/(max-min) * 100 --> converts to 0-100 scale with 0=worst, 100=best
oResearch = (
    (oResearch.loc[:, 2013:2017] - oResearch.loc[:, 2013:2017].min().min())
    / (
        oResearch.loc[:, 2013:2017].max().max()
        - oResearch.loc[:, 2013:2017].min().min()
    )
    * 100
).round(2)
# fill nan with mean of column
for col in oResearch.columns:
    oResearch[col] = oResearch[col].fillna(oResearch[col].mean())

oResearch = oResearch[oResearch.index.isin(countries)]
oResearch[[2013, 2016, 2017]]

missingCountries(oResearch)
2013 2016 2017
Belgium 0.01000 0.00 0.000000
Denmark 3.19375 14.67 8.182222
Estonia 3.19375 14.67 8.182222
Finland 0.57000 0.58 0.570000
France 6.30000 14.67 4.200000
Germany 0.40000 0.31 0.300000
Ireland 3.19375 14.67 47.360000
Latvia 3.19375 14.67 8.182222
Lithuania 3.19375 14.67 8.182222
Netherlands 0.35000 0.29 0.330000
Poland 0.10000 0.06 0.060000
Portugal 3.19375 95.47 8.182222
Spain 6.58000 11.48 10.500000
Sweden 3.19375 14.67 8.182222
United Kingdom 11.24000 9.17 10.320000
No missing countries

2. SAD/TAC

Data extracted partially manually.

Source of TAC and SAD is stockAssessment[“year”] PDFs.

Source of country catches is /OfficialNominalCatches and stockAssessment[“year”] PDFs.

If a country fishes only on fish stocks where assignment of TAC follows scientific advice, it would score 100

Code
sadTac = pd.read_csv("../data/ices_data.csv", index_col=0, header=[0, 1])
sadTac = sadTac[["SAD/TAC"]] * 100
sadTac = sadTac.droplevel(0, axis=1)
# most recent assessment is 2022
sadTac.rename(columns={"most recent": "2022"}, inplace=True)
sadTac = sadTac[sadTac.index.isin(countries)]
sadTac
# maxMin transformation as (x - min)/(max-min) * 100 --> converts to 0-100 scale with 0=worst, 100=best
sadTac = (
    (sadTac - sadTac.min().min())
    / (
        sadTac.max().max()
        - sadTac.min().min()
    )
    * 100
).round(2)
sadTac
missingCountries(sadTac)
Year 2012 2016 2022
Belgium 98.292746 97.071052 96.615720
Estonia 92.753209 86.254153 90.900148
Finland 99.440656 97.749144 95.619503
France 90.323954 91.459570 92.379607
Germany 90.655052 85.395915 87.737917
Ireland 98.126385 87.353687 75.753058
Latvia 91.715604 82.541315 91.324412
Lithuania 96.064711 84.378674 88.919591
Netherlands 95.566376 88.761011 89.214834
Poland 94.916995 80.769729 86.833090
Portugal 98.862330 94.596274 91.951678
Spain 91.262037 84.589603 88.343857
Sweden 90.155366 89.159207 87.337151
United Kingdom 79.451505 81.326929 82.954885
Denmark 84.660093 77.694543 84.036427
Year 2012 2016 2022
Belgium 95.15 90.00 88.07
Estonia 71.77 44.33 63.95
Finland 100.00 92.86 83.87
France 61.51 66.31 70.19
Germany 62.91 40.71 50.60
Ireland 94.45 48.97 0.00
Latvia 67.39 28.66 65.74
Lithuania 85.75 36.41 55.58
Netherlands 83.64 54.91 56.83
Poland 80.90 21.18 46.78
Portugal 97.56 79.55 68.38
Spain 65.47 37.30 53.15
Sweden 60.80 56.60 48.90
United Kingdom 15.61 23.53 30.40
Denmark 37.60 8.20 34.97
No missing countries

14.b

Degree of application of a legal/regulatory/policy/institutional framework which recognizes and protects access rights for small‐scale fisheries

We use two indicators:

  1. OHI Artisanal Fishing Opportunities Index: No further transformation
  2. Percentage of Fish Species Threatened: No further transformation

1. OHI ‘Artisanal opportunities’ Index

Source

Code
# OHI 'Artisanal opportunities' Index
# https://oceanhealthindex.org/global-scores/data-download/

ohiArt = pd.read_csv("../data/scoresOHI.csv")
ohiArt = ohiArt[ohiArt["region_name"].isin(countries)]
ohiArt = ohiArt[
    (ohiArt.long_goal == "Artisanal opportunities") & (ohiArt.dimension == "status")
]
ohiArt = ohiArt.pivot_table(
    columns="scenario", index="region_name", values="value", aggfunc="mean"
)

ohiArt[[2012, 2018, 2022]]

missingCountries(ohiArt)
scenario 2012 2018 2022
region_name
Belgium 79.51 77.65 78.66
Denmark 70.50 77.24 74.63
Estonia 89.39 91.18 99.24
Finland 84.67 82.73 77.45
France 77.09 76.44 78.61
Germany 73.21 76.62 73.23
Ireland 69.90 73.46 73.52
Latvia 87.40 89.29 99.01
Lithuania 88.71 90.62 97.63
Netherlands 55.01 57.44 60.35
Poland 87.82 76.21 77.61
Portugal 77.81 65.46 66.08
Spain 73.45 70.66 73.08
Sweden 94.77 95.73 97.37
United Kingdom 81.35 82.74 79.79
No missing countries

2. Percentage of Fish Species Threatened

Source Analysis done in iucnRedList.ipynb

Code
# Percentage of Fish Species Threatened
# Time series comparison is tricky: https://www.iucnredlist.org/assessment/red-list-index


threatenedFish = pd.read_csv("../data/threatenedFishIUCN.csv")
threatenedFish = threatenedFish.pivot_table(
    index="name", columns="year", values="threatenedScore"
)
threatenedFish

missingCountries(threatenedFish)
year 2012 2016 2022
name
Belgium 77.777778 87.786260 84.967320
Denmark 83.516484 89.944134 86.138614
Estonia 80.952381 89.473684 90.243902
Finland 81.818182 89.189189 90.000000
France 88.135593 93.216080 91.332611
Germany 77.611940 87.022901 82.894737
Ireland 86.592179 93.750000 91.885965
Latvia 77.272727 87.804878 88.636364
Lithuania 77.272727 87.500000 88.372093
Netherlands 83.132530 91.489362 89.047619
Poland 76.923077 87.234043 86.274510
Portugal 89.573460 94.731065 92.578125
Spain 89.938398 94.646465 92.513863
Sweden 77.272727 87.121212 84.027778
United Kingdom 86.729858 93.827160 91.337100
No missing countries

14.c

Number of countries making progress in ratifying, accepting and implementing through legal, policy and institutional frameworks, ocean-related instruments that implement international law, as reflected in the United Nations Convention on the Law of the Sea

We use two indicators:

  1. Participation in agreements of the International Marine Organization (IMO Participation Rate):
  2. Mining Area/EEZ (Deep Sea Minining)

1. Participation in agreements of the International Marine Organization

Source

Code
# dictionary for country codes used by the GISIS
with open("../data/gisisDict.csv") as csv_file:
    reader = csv.reader(csv_file)
    gisisDict = dict(reader)

gisisDict = {v: k for k, v in gisisDict.items()}
Code
# Participation in agreements of the International Marine Organization

# https://www.imo.org/en/About/Conventions/Pages/StatusOfConventions.aspx Excel file with current status of IMO conventions
# We get the historical data from the GISIS database: https://gisis.imo.org/Public/ST/Ratification.aspx
# You need to create account to access data.

# I tried to scrape the data but I am getting errors with Selenium and bs4.
# I downloaded the html manually

gisisCountries = {k: v for k, v in gisisDict.items() if k in countries}
listIMO = []
for v in gisisCountries.values():
    link = "https://gisis.imo.org/Public/ST/Ratification.aspx?cid=" + str(v)
    listIMO.append(link)

# for i in listIMO:
#     webbrowser.open(i)
Code
imoRatedf = pd.DataFrame(columns=["Country", 2012, 2016, 2021])

# loop thru the html files in the folder and extract the data
for i in range(len(os.listdir("../data/treatiesIMO/"))):
    countryIMO = np.nan
    imoRate = pd.read_html(
        "../data/treatiesIMO/Status of Treaties » Ratification of Treaties{}.html".format(
            i
        )
    )[4]
    # get the country name from the html file name by checking if string is in the list of countries
    for country in countries:
        if country in imoRate["Treaty"][0]:
            countryIMO = country
    if countryIMO is not np.nan:
        imoRate.columns = imoRate.iloc[1]
        imoRate = imoRate[2:]
        # new columns with the year of accession and denounced
        imoRate["accession"] = (
            imoRate["Date of entry into force in country"]
            .str.extract("^([^(]+)")
            .fillna("")
        )
        imoRate["denounced"] = imoRate[
            "Date of entry into force in country"
        ].str.extract(".*\\:(.*)\\).*")
        imoRate[["accession", "denounced"]] = (
            imoRate[["accession", "denounced"]]
            .apply(pd.DatetimeIndex)
            .apply(lambda x: x.dt.year)
        )
        # count the number of treaties each country accessioned and didn't denounced by 2012, 2016 and 2021
        for i in (2012, 2016, 2021):
            imoRate[str(i)] = np.where(
                (imoRate.accession < i)
                & ((imoRate.denounced > i) | (imoRate.denounced.isna())),
                1,
                0,
            )
        imoCount = (
            countryIMO,
            imoRate["2012"].sum(),
            imoRate["2016"].sum(),
            imoRate["2021"].sum(),
        )
        imoRatedf.loc[len(imoRatedf), imoRatedf.columns] = imoCount
# calculate total possible treaties, apply dif-ref and convert to percentage
totalIMO = len(imoRate.dropna(subset=["Date of entry into force in country"]))
imoRatedf = imoRatedf.set_index("Country").sort_index()
imoRatedf = 100 - 100 * (totalIMO - imoRatedf).divide(totalIMO)
imoRatedf = imoRatedf.astype(np.float64)
imoRatedf = imoRatedf.apply(pd.to_numeric)
imoRatedf

missingCountries(imoRatedf)
2012 2016 2021
Country
Belgium 78.431373 76.470588 90.196078
Denmark 80.392157 86.274510 92.156863
Estonia 76.470588 78.431373 82.352941
Finland 76.470588 78.431373 90.196078
France 84.313725 84.313725 96.078431
Germany 80.392157 82.352941 88.235294
Ireland 66.666667 68.627451 68.627451
Latvia 80.392157 80.392157 82.352941
Lithuania 58.823529 62.745098 64.705882
Netherlands 84.313725 86.274510 92.156863
Poland 80.392157 84.313725 86.274510
Portugal 68.627451 76.470588 86.274510
Spain 86.274510 86.274510 88.235294
Sweden 82.352941 90.196078 94.117647
United Kingdom 78.431373 78.431373 78.431373
No missing countries

1. Mining Area/EEZ

Source

Code
# Deep Sea Mining, Mining Area/EEZ

seaMining = pd.read_excel("../data/deepSeaMining.xlsx", sheet_name="Data")
seaMining["Year"] = 2023
seaMining.rename(columns={"Score 2023": "2023"}, inplace=True)
seaMining = seaMining.set_index(["Country"])[["2023"]]
seaMining
missingCountries(seaMining)
2023
Country
Belgium 0
Germany 100
Denmark 100
Estonia 100
Spain 100
Finland 100
France 100
Ireland 100
Lithuania 100
Latvia 100
Netherlands 100
Poland 97
Portugal 100
Sweden 100
United Kingdom 99
No missing countries

Indicators aggregation

Given our ratio-scale full comparable indicators,IitI_{it}, meaningful aggregation of NN indicators into a composite indicator CItCI_t is obtained according to social choice theory by applying a generalized mean:

CIt(αit,Iit,σ)=(i=1NαitIitσ1σ)σσ1fort=2012,2018,2021(or most recent)CI_t(\alpha_{it},I_{it},\sigma) = \left(\sum^N_{i=1}\alpha_{it}I^{\frac{\sigma-1}{\sigma}}_{it}\right)^{\frac{\sigma}{\sigma-1}} \quad \text{for} \quad t = 2012, 2018, 2021 \text{(or most recent)}

with weights αit>0\alpha_{it} > 0 and $0 ≤ ≤ $. The parameter σ\sigma is used to quantify the elasticity of substitution between the different indicators. High (low) values of σ\sigma imply good (poor) substitution possibilities which means that a high score in one indicator can (cannot) compensate a low score in another indicator. Consequently, high and low values of σ\sigma correspond to concepts of weak and strong sustainability, respectively. Depending on σ\sigma, one can obtain a full class of specific function forms for the composite indicator.

We define:

σTarget=0.5\sigma_{Target} = 0.5 and σTarget=10\sigma_{Target} = 10

Code
varDf = [
    nitro,
    eutro,
    wasteG,
    wasteR,
    mspVal,
    ohiBio,
    co2pc,
    phScore,
    fmsyF,
    bBmsy,
    kbaMPA,
    natura2kEEZ,
    fseRisk,
    tacCatch,
    compositeIUU,
    livelihoodsAgg,
    economies,
    gvaNights,
    oResearch,
    sadTac,
    ohiArt,
    threatenedFish,
    imoRatedf,
    seaMining,
]
varNames = [
    "nitro",
    "eutro",
    "wasteG",
    "wasteR",
    "mspVal",
    "ohiBio",
    "co2pc",
    "phScore",
    "fmsyF",
    "bBmsy",
    "kbaMPA",
    "natura2kEEZ",
    "fseRisk",
    "tacCatch",
    "compositeIUU",
    "livelihoodsAgg",
    "economies",
    "gvaNights",
    "oResearch",
    "sadTac",
    "ohiArt",
    "threatenedFish",
    "imoRatedf",
    "seaMining",
]

dictIndicators = dict(zip(varNames, varDf))
# stack variables in each dataframe
for name, df in dictIndicators.items():
    df = df.stack().to_frame().rename(columns={0: str(name)})
    df.index.names = ["Country", "Year"]
    df.reset_index(inplace=True)
    df.Year = df.Year.astype(int)
    df.set_index(["Country", "Year"], inplace=True)
    dictIndicators[name] = df
# merge all variables into one dataframe, forward and back fill by country
indicators = pd.concat(dictIndicators.values(), axis=1, join="outer")
indicators = indicators.reset_index().sort_values(["Country", "Year"])
indicators = indicators[indicators.Year.isin(list(range(2012, 2022)))]
indicators = indicators.groupby(["Country"], group_keys=False).apply(
    lambda x: x.ffill().bfill()
)
indicators = indicators.set_index(["Country", "Year"])
indicatorsL = {
    "plastic": ["wasteG", "wasteR"],
    "14.1": ["nitro", "eutro", "plastic"],
    "14.2": ["mspVal", "ohiBio"],
    "14.3": ["co2pc", "phScore"],  # dropped "scoreESD"
    "14.4": ["fmsyF", "bBmsy"],
    "14.5": ["kbaMPA", "natura2kEEZ"],
    "14.6": ["fseRisk", "tacCatch", "compositeIUU"],
    "14.7": ["livelihoodsAgg", "economies", "gvaNights"],
    "14.a": ["oResearch", "sadTac"],
    "14.b": ["ohiArt", "threatenedFish"],
    "14.c": ["imoRatedf", "seaMining"],
}

# calculate composite indicators for each target importing the function from composite.py
targets = indicators.copy()
for target, indicator in indicatorsL.items():
    try:
        alpha = 1 / len(indicatorsL[target])
        df = targets[indicator]
        sigma = 10
        targets[target] = ci.compositeDF(alpha, df, sigma)
    except KeyError:
        print("Missing indicator for", target)
targets = targets[[i for i in targets if i.startswith("14")]]

Monte Carlo simulation

Code source is in the composite.py file

Code
%%time
# calculate composite score for each target importing the function from composite.py
# monte carlo for strong sustainability and one sigma for weak sustainability
scoresStrong = ci.compositeMC(df = targets, years=[2012, 2016, 2021], simulations=10_000)
scoresWeak = pd.DataFrame(ci.compositeDF(alpha = 1 / len(targets.columns), df = targets, sigma = 10), columns=['scoreWeak'])
scoresStrong = scoresStrong[scoresStrong.index.get_level_values('Country').isin(countries)]
CPU times: user 18.9 s, sys: 8.42 ms, total: 18.9 s
Wall time: 18.9 s

Plots

Code
InteractiveShell.ast_node_interactivity = "last_expr"  # last_expr
Code
# merge all data and calculate EEZ-weigthed average for indicators and targets

data_frames = [indicators, targets, scoresStrong, scoresWeak]
fullData = reduce(
    lambda left, right: pd.merge(
        left, right, left_index=True, right_index=True, how="inner"
    ),
    data_frames,
)
eezAverage = (
    pd.DataFrame(fullData.stack().reset_index())
    .merge(eez, left_on="Country", right_on="Country", how="left")
    .rename(columns={0: "value", "level_2": "indicator"})
)

eezAverage["valueEEZ"] = eezAverage.value * eezAverage.Area_km2
eezAverage = (
    eezAverage.groupby(["Year", "indicator"])
    .sum(numeric_only=True)
    .drop("value", axis=1)
)
eezAverage["averageEEZ"] = eezAverage.valueEEZ / eezAverage.Area_km2
Code
# define function to plot boxplots


def plotBox(
    df,
    xaxis_title=str,
    yaxis_title=str,
    xlegend=float,
    ylegend=float,
    maxShift=float,
    minShift=float,
):
    # start plots
    fig = px.box(df, x="indicator", y="value")
    # add Countries in the max and min
    # create annotation list, x is the x-axis position of the annotation
    annoList = []

    maxCountriesD = {}
    x = 0
    for s in df.indicator.unique():
        # get max value for indicator
        maxVal = np.max(df[df["indicator"] == s]["value"])
        # get countries with max value, if more than 4 countries, use *
        countries = df.loc[
            (df["value"] == maxVal) & (df["indicator"] == s), "Country"
        ].values
        if len(countries) > 4:
            maxCountriesD[s] = countries
            countries = "*"
        countries = ",<br>".join(countries)
        annotation = dict(
            x=x,
            # y position is the max value
            y=maxVal,
            text=countries,
            yshift=maxShift,
            showarrow=False,
        )
        annoList.append(annotation)
        x += 1

    minCountriesD = {}
    x = 0
    for s in df.indicator.unique():
        # get min value for indicator
        minVal = np.min(df[df["indicator"] == s]["value"])
        # get countries with min value, if more than 4 countries, use * and store in dictionary
        countries = df.loc[
            (df["value"] == minVal) & (df["indicator"] == s), "Country"
        ].values
        if len(countries) > 4:
            minCountriesD[s] = countries
            countries = "*"
        countries = ",<br>".join(countries)
        annotation = dict(
            x=x,
            # y position is the min value
            y=minVal,
            text=countries,
            yshift=minShift,
            showarrow=False,
        )
        annoList.append(annotation)
        x += 1

    # add EEZ-weigthed average values
    indicatorAverage = eezAverage.loc[(2021, df.indicator.unique()), :].reset_index()
    for s in indicatorAverage.indicator.unique():
        # get EEZ-weigthed average value for indicator
        averageVal = float(
            indicatorAverage[indicatorAverage["indicator"] == s]["averageEEZ"]
        )
        fig.add_scatter(
            x=[s],
            # y position is the average value
            y=[averageVal],
            type="scatter",
            mode="markers",
            legendgroup="EEZ average",
            name="EEZ average",
            marker=dict(color="black", size=6),
        )

    fig.update_layout(annotations=annoList)

    # just to add the legend with one entry
    fig.update_traces(showlegend=False).add_scatter(
        x=[s],
        y=[averageVal],
        type="scatter",
        mode="markers",
        legendgroup="EEZ average",
        name="EEZ average",
        marker=dict(color="black", size=6),
    )

    # change legend position, axis titles
    fig.update_layout(
        legend=dict(
            x=xlegend,
            y=ylegend,
            traceorder="normal",
            font=dict(family="sans-serif", size=12, color="black"),
            bgcolor="White",
        ),
        xaxis_title=xaxis_title,
        yaxis_title=yaxis_title,
        # yaxis_range=[-20,100]
    )

    fig.show()
Code
# plot min, max, and EEZ-weigthed average for targets

indicatorsBox = (
    fullData[fullData.index.isin([2021], level=1)]
    .stack()
    .reset_index()
    .rename(columns={0: "value", "level_2": "indicator"})
)
indicatorsBox = indicatorsBox[indicatorsBox.indicator.isin(varNames)]
indicatorsBox.Country = indicatorsBox.Country.map(country_to_abbrev)


targetsBox = (
    fullData[fullData.index.isin([2021], level=1)]
    .stack()
    .reset_index()
    .rename(columns={0: "value", "level_2": "indicator"})
)
targetsBox = targetsBox[targetsBox.indicator.isin(list(indicatorsL.keys()))]

plotBox(
    indicatorsBox,
    "Indicators",
    "Percentage",
    xlegend=0.85,
    ylegend=0.2,
    maxShift=30,
    minShift=-10,
)
plotBox(
    targetsBox, "Targets", "Percentage", xlegend=0, ylegend=1, maxShift=10, minShift=-10
)
Code
# plot comparing strong and weak sustainability
df = fullData.reset_index()

# translate score to ranking
df["rankMean"] = df.groupby("Year")["scoreMean"].rank(ascending=False)
df["rankStd"] = df["scoreStd"] * len(df["Country"].unique()) / 100
df["rankWeak"] = df.groupby("Year")["scoreWeak"].rank(ascending=False)

for year in [2012, 2016, 2021]:
    fig = px.scatter(
        df[df.Year == year],
        x="rankWeak",
        y="rankMean",
        text="Country",
        title=str(year),
        error_y=np.array(df[df.Year == year]["rankStd"]),
    )
    fig.update_traces(
        textposition="bottom right",
        marker=dict(color="black", size=0),
        error_y=dict(width=0),
    )
    # add 45 degree line
    fig.update_layout(
        shapes=[
            {
                "type": "line",
                "yref": "paper",
                "xref": "paper",
                "y0": 0,
                "y1": 1,
                "x0": 0,
                "x1": 1,
                "line": {"color": "grey", "width": 2},
                "layer": "below",
            }
        ]
    )
    # change axis titles, fix aspect ratio
    fig.update_layout(
        xaxis_title=r"$\text{Ranking with unlimited substitution possibilities} \quad \sigma= \infty$",
        yaxis_title=r"$\text{Ranking with limited substitution possibilities} \quad \sigma=\text{U(0,1)}$",
        font=dict(family="sans-serif"),
        autosize=False,
        width=800,
        height=800,
        # important for 45 degree line to show correctly
        yaxis=dict(range=[0, len(df["Country"].unique()) + 3]),
        xaxis=dict(range=[0, len(df["Country"].unique()) + 3]),
    )
    fig.show()
Code
tableLatex = df[
    [
        "Country",
        "Year",
        "scoreMean",
        "scoreStd",
        "rankMean",
        "rankStd",
        "scoreWeak",
        "rankWeak",
    ]
]

tableLatex = tableLatex.set_index(["Country", "Year"]).stack().unstack([1, 2])
tableLatex = tableLatex.rename_axis(["Year", "Value"], axis=1)
tableLatex = tableLatex[
    [
        # score strong
        (2012, "scoreMean"),
        (2012, "scoreStd"),
        (2016, "scoreMean"),
        (2016, "scoreStd"),
        (2021, "scoreMean"),
        (2021, "scoreStd"),
        # rank strong
        (2012, "rankMean"),
        (2012, "rankStd"),
        (2016, "rankMean"),
        (2016, "rankStd"),
        (2021, "rankMean"),
        (2021, "rankStd"),
        # score weak
        (2012, "scoreWeak"),
        (2012, "rankWeak"),
        (2016, "scoreWeak"),
        (2016, "rankWeak"),
        (2021, "scoreWeak"),
        (2021, "rankWeak"),
        # rank weak
    ]
]
conceptL = ["Concept of Strong Sustainability ( σ ∼ U (0,1), N = 10,000)"] * 12 + [
    "Concept of Weak Sustainability (σ → ∞)"
] * 6

# add multiindex to tableLatex using conceptL list
# tableLatex.columns = pd.MultiIndex.from_arrays([conceptL, tableLatex.columns.levels[0],tableLatex.columns.levels[1] ],names=[["Concept", "Year", 'Value']])
tableLatex = tableLatex.sort_values(by=(2012, "rankMean"))
tableLatex = tableLatex.style.format(precision=2)
tableLatex
# print(tableLatex.to_latex(siunitx=True, hrules=True))
Year 2012 2016 2021 2012 2016 2021 2012 2016 2021
Value scoreMean scoreStd scoreMean scoreStd scoreMean scoreStd rankMean rankStd rankMean rankStd rankMean rankStd scoreWeak rankWeak scoreWeak rankWeak scoreWeak rankWeak
Country                                    
Belgium 51.97 4.94 45.70 11.89 47.11 10.90 1.00 0.74 6.00 1.78 2.00 1.63 59.02 2.00 63.11 2.00 62.28 1.00
Latvia 51.65 6.49 35.12 13.28 37.37 11.21 2.00 0.97 13.00 1.99 9.00 1.68 60.85 1.00 59.77 6.00 58.07 8.00
Estonia 48.15 6.25 48.18 7.06 49.01 9.10 3.00 0.94 4.00 1.06 1.00 1.36 58.80 3.00 59.52 7.00 61.64 2.00
Netherlands 47.96 7.10 39.85 7.95 41.91 8.32 4.00 1.07 10.00 1.19 7.00 1.25 56.70 8.00 52.88 14.00 55.45 12.00
Germany 47.18 6.26 44.05 11.00 43.94 11.05 5.00 0.94 8.00 1.65 6.00 1.66 57.47 5.00 60.08 5.00 60.59 3.00
Finland 47.05 6.92 52.22 4.86 45.42 9.20 6.00 1.04 3.00 0.73 4.00 1.38 57.12 6.00 59.41 8.00 57.67 9.00
Poland 46.31 7.93 33.88 13.69 32.64 12.76 7.00 1.19 14.00 2.05 13.00 1.91 58.44 4.00 58.95 9.00 56.13 11.00
Sweden 46.17 6.74 53.91 6.16 45.82 9.18 8.00 1.01 2.00 0.92 3.00 1.38 56.94 7.00 61.95 3.00 58.62 7.00
Ireland 46.04 5.10 0.00 0.00 0.00 0.00 9.00 0.76 15.00 0.00 15.00 0.00 54.31 13.00 47.91 15.00 49.01 15.00
Portugal 44.90 5.74 54.46 6.84 41.00 11.05 10.00 0.86 1.00 1.03 8.00 1.66 54.73 12.00 63.90 1.00 57.12 10.00
Lithuania 43.82 7.01 46.65 8.75 44.41 9.54 11.00 1.05 5.00 1.31 5.00 1.43 54.19 14.00 60.89 4.00 59.54 5.00
France 42.33 8.15 42.27 9.57 33.66 14.65 12.00 1.22 9.00 1.44 10.00 2.20 56.12 10.00 57.37 10.00 60.53 4.00
Denmark 39.93 9.36 35.25 12.73 32.88 15.03 13.00 1.40 12.00 1.91 11.00 2.25 56.09 11.00 56.38 12.00 58.78 6.00
United Kingdom 37.50 12.06 36.29 9.96 32.67 10.42 14.00 1.81 11.00 1.49 12.00 1.56 56.40 9.00 55.81 13.00 54.58 13.00
Spain 32.65 9.39 45.59 8.21 30.44 12.02 15.00 1.41 7.00 1.23 14.00 1.80 51.80 15.00 57.25 11.00 52.63 14.00

Trash bin

2. Carbon emissions per capita

Several sources (see code)

Code
# These data SHALL NOT BE USED. See reason on why ENV_AIR_GGE is preferable for the calculation:
# (https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Greenhouse_gas_emission_statistics_-_emission_inventories)
# (https://ec.europa.eu/eurostat/statistics-explained/index.php?title=Greenhouse_gas_emission_statistics_-_air_emissions_accounts&oldid=551152#Greenhouse_gas_emissions)
# CO2, KG_HAB, Eurostat
# https://ec.europa.eu/eurostat/databrowser/view/ENV_AC_AINAH_R2/

# co2 = pd.read_csv('../data/env_ac_ainah_r2.csv')
# co2 = co2[co2['geo'].isin(countryAbb)]
# co2['geo'] = co2['geo'].map(abbrev_to_country).fillna(co2['geo'])
# co2 = co2.pivot_table(columns='TIME_PERIOD', index='geo', values='OBS_VALUE', aggfunc='mean')

# mr = co2.columns[-1] # most recent year
# # co2[[2012, 2016, mr]]/1000

1. Greenhouse gas emissions under the Effort Sharing Decision (ESD)

Several sources (see code)

2. Carbon emissions per capita

Several sources (see code)

Code
# # CO2, TOTX4_MEMONIA, THS_T thousand tonnes, Eurostat
# # https://ec.europa.eu/eurostat/databrowser/view/ENV_AIR_GGE/

# co2 = pd.read_csv("../data/env_air_gge.csv")
# co2["geo"] = co2["geo"].map(abbrev_to_country).fillna(co2["geo"])
# co2 = co2[
#     co2["geo"].isin(countries)
#     & (co2.airpol == "CO2")
#     & (co2.src_crf == "TOTX4_MEMONIA")
# ]
# co2 = co2.pivot_table(
#     columns="TIME_PERIOD", index="geo", values="OBS_VALUE", aggfunc="mean"
# )

# mr = co2.columns[-1]  # most recent year
# co2pc = co2 / pop * 1000  ## tonnes co2 per capita
# co2pc.dropna(how="all", inplace=True)
# # maxMin transformation as (max-x)/(max-min) * 100 --> converts to 0-100 scale with 0=worst, 100=best
# co2pc = (
#     (co2pc.loc[:, 2012:mr].max().max() - co2pc.loc[:, 2012:mr])
#     / (co2pc.loc[:, 2012:mr].max().max() - co2pc.loc[:, 2012:mr].min().min())
#     * 100
# ).round(2)

# co2pc[[2012, 2016, mr]]

# missingCountries(co2pc)
Code
# # Greenhouse gas emissions under the Effort Sharing Decision (ESD), Million tonnes CO2 equivalent (Mt CO2 eq), European Environment Agency
# # https://www.eea.europa.eu/data-and-maps/data/esd-4

# ghgESD = pd.read_excel("../data/EEA_ESD-GHG_2022.xlsx", sheet_name=1, skiprows=11)
# ghgESD = ghgESD[ghgESD["Geographic entity"].isin(countries)]
# ghgESD = ghgESD.set_index("Geographic entity")
# ghgESD = ghgESD.dropna(axis=1, how="all")
# negativeValues(ghgESD)
# mr = ghgESD.columns[-1]  # most recent year
# ghgESD = ghgESD * 1_000_000
Code
# # Allocation for 2020 target
# # https://ec.europa.eu/clima/ets/esdAllocations.do?languageCode=en&esdRegistry=-1&esdYear=&search=Search&currentSortSettings=
# allocation2020 = pd.read_xml(
#     "../data/esdAllocations2020.xml", xpath=".//ESDAllocations/ESDAllocationInfo"
# )
# allocation2020["Country"] = (
#     allocation2020["ESDMemberState"]
#     .map(abbrev_to_country)
#     .fillna(allocation2020["ESDMemberState"])
# )
# allocation2020 = allocation2020[allocation2020["Country"].isin(countries)]
# allocation2020 = allocation2020.pivot_table(
#     columns="ESDYear", index="Country", values="Allocation", aggfunc="mean"
# )

# # Allocation for 2030 target
# # https://eur-lex.europa.eu/legal-content/EN/TXT/HTML/?uri=CELEX:32020D2126
# allocation2030 = pd.read_html("../data/esdAllocations2030.html")[13][1:]
# allocation2030.columns = allocation2030.iloc[0]
# allocation2030 = allocation2030[1:]
# allocation2030.loc[
#     allocation2030["Member State"].str.contains("Netherlands"), "Member State"
# ] = "Netherlands"
# allocation2030 = allocation2030[allocation2030["Member State"].isin(countries)]
# allocation2030.set_index("Member State", inplace=True)
# for col in allocation2030.columns:
#     allocation2030[col] = allocation2030[col].apply(
#         lambda x: str(x).replace("\xa0", "")
#     )
# allocation2030 = allocation2030.astype(int)

# # Merge 2005 values with 2020 and 2030 allocations. Interpolate for years 2006-2012
# allocationESD = ghgESD[[2005]].merge(
#     allocation2020.merge(
#         allocation2030, left_index=True, right_index=True, how="outer"
#     ),
#     left_index=True,
#     right_index=True,
#     how="outer",
# )
# allocationESD.columns = allocationESD.columns.astype(int)
# allocationESD[list(range(2006, 2013))] = np.nan  # create empty columns for 2006-2012
# allocationESD = allocationESD[list(range(2005, 2031))]
# allocationESD.interpolate(axis=1, inplace=True)

# # Calculate score for ESD
# scoreESD = 100 - (100 * (ghgESD - allocationESD) / allocationESD)
# scoreESD[scoreESD > 100] = 100
# scoreESD = scoreESD.ffill(axis=1)  # fill empty values with last available year
# scoreESD[[2013, 2016, 2021]]
# missingCountries(scoreESD)

Alternative (dif-ref) score for ESD (measuring this way does not make a lot of sense to me)

Code
# # Alternative (dif-ref) score for ESD (measuring this way does not make a lot of sense to me)

# scoreESD2020 = 100 - 100 * (
#     ghgESD.loc[:, :2020].subtract(allocationESD[2020], axis=0)
# ).divide(allocationESD[2020], axis=0)
# scoreESD2030 = 100 - 100 * (
#     ghgESD.loc[:, 2021:].subtract(allocationESD[2030], axis=0)
# ).divide(allocationESD[2030], axis=0)
# scoreESD1 = scoreESD2020.merge(scoreESD2030, left_index=True, right_index=True)
# scoreESD1[scoreESD1 > 100] = 100
# # scoreESD1[[2012, 2018, 2021]]
Code
# # Effort sharing regulation
# # Get the targets for 2020 and 2030 in percentage
# # Member State greenhouse gas emission limits in 2020 and 2030 compared to 2005 greenhouse gas emissions levels
# # There targets for 2020 and for 2030
# # https://www.eea.europa.eu/data-and-maps/figures/national-progress-towards-greenhouse-gas
# # Official journals with the data can be found at (https://climate.ec.europa.eu/eu-action/effort-sharing-member-states-emission-targets_en)

# limitESR = pd.read_excel('../data/targetsESR/FIG2-154307-CLIM058-v2-Data.xlsx', sheet_name=1, skiprows=19, header=1, skipfooter=32)
# limitESR = limitESR.rename(columns={'Unnamed: 0':'Country', '(Resulting) ESR target 2030 (AR4)':'2030ESRtarget','ESR limit for 2020':'2020ESRtarget', '2005 ESD BJ':'2005Level'})
# limitESR = limitESR[['Country', '2020ESRtarget','2030ESRtarget','2005Level']]
# limitESR.set_index('Country', inplace=True)
# limitESR = limitESR[limitESR.index.isin(countries)]
# #UK is not in the dataset, we need to add from the official journal
# limitESR
# missingCountries(limitESR)

1. Fishing Subsidies relative to Landings

Several sources

Code
# # Landing data for Fishing Subsidies

# # load oecd abbreviation list
# with open("../data/oecdAbb.csv") as csv_file:
#     reader = csv.reader(csv_file)
#     oecdAbb = dict(reader)

# # Landings in USD
# # https://data.oecd.org/fish/fish-landings.htm

# landing = pd.read_csv("../data/fishLandingsOECD.csv")
# landing["LOCATION"] = landing["LOCATION"].map(oecdAbb).fillna(landing["LOCATION"])
# landing = landing[
#     landing["LOCATION"].isin(countries)
#     & (landing["MEASURE"] == "USD")
#     & (landing["SUBJECT"] == "TOT")
# ]
# # landing.pivot_table(columns='TIME', index='LOCATION', values='Value', aggfunc='mean')
# # landing = landing[list(range(2010, 2021))]
# landing.rename(
#     columns={"LOCATION": "Country", "TIME": "Year", "Value": "Landing"}, inplace=True
# )
# landing.set_index(["Country", "Year"], inplace=True)


# # merge subsidies-landings and calculate score
# fseLanding = pd.merge(
#     fseRisk,
#     landing,
#     how="left",
#     left_on=["Country", "Year"],
#     right_on=["Country", "Year"],
# )
# fseLanding["fseLanding"] = fseLanding.Value / fseLanding.Landing
# fseLanding = fseLanding.pivot_table(
#     columns="Year", index="Country", values="fseLanding", aggfunc="mean"
# )
# mr = fseLanding.columns[-1]  # most recent year
# # Poland is an outlier
# # fseLanding = fseLanding[~fseLanding.index.str.contains("Poland")]
# # maxMin transformation as (max-x)/(max-min) * 100 --> converts to 0-100 scale with 0=worst, 100=best

# fseScore = (
#     (fseLanding.loc[:, 2012:mr].max().max() - fseLanding.loc[:, 2012:mr])
#     / (fseLanding.loc[:, 2012:mr].max().max() - fseLanding.loc[:, 2012:mr].min().min())
#     * 100
# ).round(2)
# # fseScore = fseScore.ffill(axis=1)  # fill empty values with last available year
# fseScore[[2012, 2016, mr]]
# missingCountries(fseScore)
Code
# From 14.6, using Eurostat data for landing values.

# Even though the USD-EUR discrepancy does not affect the ratio we calculate,
# we get today's exchange rate to convert landing values to USD and have a consistent unit
# Solution source: https://stackoverflow.com/a/17250702/14534411


# r = requests.get('http://www.ecb.int/stats/eurofxref/eurofxref-daily.xml', stream=True)
# tree = ET.parse(r.raw)
# root = tree.getroot()
# namespaces = {'ex': 'http://www.ecb.int/vocabulary/2002-08-01/eurofxref'}
# currency = 'USD'
# match = root.find('.//ex:Cube[@currency="{}"]'.format(currency.upper()), namespaces=namespaces)
# eurTOusd = float(match.attrib['rate'])

# Landings of fishery products, TOTAL FISHERY PRODUCTS, Euro, Eurostat
# https://ec.europa.eu/eurostat/databrowser/view/FISH_LD_MAIN/

# landing = pd.read_csv('../data/fish_ld_main.csv')
# landing['Country'] = landing.geo.map(abbrev_to_country).fillna(landing.geo)
# landing = landing[landing.Country.isin(countries)]
# landing = landing[['Country', 'TIME_PERIOD', 'OBS_VALUE', 'unit']]
# landing['landingUSD'] = landing.OBS_VALUE * eurTOusd
# landing.pivot_table(columns='TIME_PERIOD', index='Country', values='landingUSD', aggfunc='mean')
Code
# Alternative dataset for fisheries subsidies

# https://www.sciencedirect.com/science/article/abs/pii/S0308597X16000026#bib4


# sumaila2009 = pd.read_excel("../data/subsidies2016Sumaila.xlsx")
# sumaila2009["Countries"] = sumaila2009["Countries"].str.strip()
# sumaila2009 = sumaila2009[
#     sumaila2009["Countries"].isin(countries) | sumaila2009["Countries"].isin(countryAbb)
# ]
# sumaila2009.Countries = sumaila2009.Countries.map(abbrev_to_country).fillna(
#     sumaila2009.Countries
# )
# sumaila2009 = sumaila2009[["Countries", "ReEst_Subsidy2009", "SubType"]]
# sumaila2009 = sumaila2009.groupby(["Countries"]).sum(numeric_only=True)
# # sumaila2009

# missingCountries(sumaila2009)

# sumaila2018 = pd.read_csv("../data/subsidies2019Sumaila.csv")
# sumaila2018 = sumaila2018[["Country", "Constant 2018 USD", "Type"]]
# sumaila2018 = sumaila2018[sumaila2018.Country.isin(countries)]
# sumaila2018 = sumaila2018.groupby(["Country"]).sum(numeric_only=True)
# # sumaila2018

# missingCountries(sumaila2018)

OHI Habitat subgoal (Biodiversity)

Code
# # habitat subgoal
# # https://github.com/OHI-Science/ohi-global/tree/draft/yearly_results/global2022/Results/data
# years = [2012, 2016, 2022]
# habitatOHI = pd.DataFrame()
# for year in years:
#     tempDF = pd.read_csv("../data/habitatOHI/scores_eez_{0}.csv".format(year))
#     tempDF["year"] = year
#     habitatOHI = pd.concat([habitatOHI, tempDF])
# habitatOHI = habitatOHI[habitatOHI.region_name.isin(countries)][
#     ["region_name", "year", "HAB"]
# ]
# habitatOHI = habitatOHI.pivot_table(index="region_name", columns="year", values="HAB")
# habitatOHI
# missingCountries(habitatOHI)

EEZ as per eea.europa.eu

Code
# EEZ as per the EEA (France does not include outermost regions)
# https://www.eea.europa.eu/data-and-maps/daviz/n2k-cover-in-national-marine-waters/#tab-chart_2

# eezEEA = pd.read_csv("../data/n2k-cover-in-national-marine-waters.csv")
# eezEEA["eezEEA"] = (
#     eezEEA["Natura 2000 surface area:number"]
#     / eezEEA["Natura 2000 cover in national waters:number"]
#     * 100
# )
# eezEEA.rename(columns={"Country:text": "Country"}, inplace=True)
# eezEEA.set_index("Country", inplace=True)
# eezEEA = eezEEA[(eezEEA.index.isin(countries))]
# eezEEA.eezEEA.astype(int)

MPAs as defined by IUCN

Code
# iso3 = pd.read_csv("../data/iso3.csv")
# iso3 = iso3[iso3.name.isin(countries + outermost)]
# iso3Countries = list(iso3.iso3)
# dictISO3 = iso3.set_index("iso3").to_dict()["name"]

# # Protected Planet WDPA data
# # https://www.protectedplanet.net/en/thematic-areas/wdpa?tab=WDPA
# # Metadata https://wdpa.s3-eu-west-1.amazonaws.com/WDPA_Manual/English/WDPA_WDOECM_Manual_1_6.pdf
# wdpa = pd.read_csv("../data/WDPA_Jul2023_Public_csv/WDPA_Jul2023_Public_csv.csv")
Code
# wdpaDF = wdpa[
#     (wdpa.MARINE.isin([1, 2]))
#     & (wdpa.PA_DEF == 1)
#     & (wdpa.ISO3.isin(iso3Countries + ["BLM;GLP;MAF;MTQ"]))
#     & (wdpa.STATUS.isin(["Designated"]))
#     # & (wdpa.DESIG_ENG.str.contains('Directive')) # Check for Natura 2000 sites
# ]
# mpaProtected = pd.DataFrame()
# for year in [2012, 2016, 2022]:
#     tempDF = (
#         wdpaDF[wdpaDF.STATUS_YR <= year].groupby("PARENT_ISO3").sum(numeric_only=True)
#     )
#     tempDF["year"] = year
#     mpaProtected = pd.concat([mpaProtected, tempDF])
# mpaProtected.index = mpaProtected.index.map(dictISO3)
# mpaProtected = mpaProtected.merge(eez, left_index=True, right_index=True)
# mpaProtected["mpaEEZ"] = mpaProtected.REP_M_AREA / mpaProtected.Area_km2 * 100
# mpaProtected["Score"] = 100 * (mpaProtected.mpaEEZ) / 30
# mpaProtected.loc[mpaProtected["Score"] > 100, "Score"] = 100
# mpaProtected = mpaProtected.pivot(columns="year", values="Score")
# mpaProtected
# missingCountries(mpaProtected)
Code
# # Old MPA data (similar results to Natura2k, even though they use the Protected Planet data)
# # Marine protected areas (% of territorial waters), World Bank aggregation of https://www.protectedplanet.net/en/thematic-areas/marine-protected-areas
# # https://data.worldbank.org/indicator/ER.MRN.PTMR.ZS

# mpa = pd.read_csv("../data/API_ER.MRN.PTMR.ZS_DS2.csv", skiprows=4)
# mpa = mpa[mpa["Country Name"].isin(countries)].set_index("Country Name")
# mpa = mpa.dropna(axis=1, how="all")
# mpa = mpa.drop(["Country Code", "Indicator Name", "Indicator Code"], axis=1)
# # display all rows
# negativeValues(mpa)
# mpa = (mpa / 0.3).round(2)  # dis-ref with target 30%
# mpa[mpa > 100] = 100
# mpa.sort_index(inplace=True)
# mpa[["2016", "2021"]]
# missingCountries(mpa)

Measures under the Marine Strategy Framework Directive (DROPPED)

Code
# The barplot here: https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=CELEX:52018DC0562&from=EN
# Comes from https://eur-lex.europa.eu/legal-content/EN/TXT/PDF/?uri=CELEX:52018SC0393

# The analogous report for 2020 is here https://environment.ec.europa.eu/system/files/2023-04/C_2023_2203_F1_COMMUNICATION_FROM_COMMISSION_EN_V5_P1_2532109.PDF
# But the assessment is much shorter. They refer the reader to a JRC report:
# https://publications.jrc.ec.europa.eu/repository/handle/JRC129363
# That report assesses all the descriptors, but it cannot be compared to the previous assessment.
# Moreover, the source code and data are not available.

# Overall, it is hard to make an indicator for the measures taken against pressure indicators by the MS.
# Countries report different measures and data is poor.